gvc_agora_opentargets

Setup environment

library(tidyverse)
library(janitor)
library(broom)
library(readxl)
library(jsonlite)

library(gprofiler2)

theme_set(theme_bw())

set.seed(666)

Read and prep data

GVC

Genes within 1Mb window of (each side of?) GVC loci from Fanny:

gvc <- read_xlsx("GVC_1Mb_comparison_050224.xlsx") %>%
  clean_names() %>% 
  separate(gene_id, c("gene_id", "version")) %>%
  select(-version, -agora_nominated_list, -opentarget_info)

gvc
gvc.genes <- gvc %>% distinct(gene_id, .keep_all = TRUE) %>% select(gene_id, gene_symbol) %>% arrange(gene_symbol)

gvc.genes

Agora

Alzheimer’s disease gene prioritization scores from Agora (see also related journal article):

ago1 <- read_json("syn25741025.overall_scores.json", simplifyVector = TRUE) %>% as_tibble()

ago1

Alzheimer’s disease genes (AMPAD Agora) from Fanny:

ago2 <- read_csv("AMPAD_agora_032124_gene-list.csv")
ago2
ago <- ago1 %>% filter(hgnc_symbol %in% ago2$`Gene Symbol`)

OpenTargets

Alzheimer’s disease gene prioritization scores from OpenTargets:

# ot <- read_tsv("OT-MONDO_0004975-associated-targets-6_4_2024-v24_03.tsv", show_col_types = FALSE, na = "No data")

ot <- read_tsv("OT-MONDO_0004975-associated-targets-9_19_2024-v24_09.tsv", show_col_types = FALSE, na = "No data")

ot

Add Ensembl Gene IDs (WTF!):

otcols <- colnames(ot)
otensg <- gconvert(
  query = ot$symbol,
  organism = "hsapiens",
  target= "ENSG",
  mthreshold = Inf,
  filter_na = TRUE) %>% 
  mutate(input_number = as.character(input_number)) %>%
  left_join(ot %>% rownames_to_column(var = "input_number"), by = "input_number") %>% 
  select(ensembl_gene_id = target, otcols)

otensg

Overlaps between GVC, Agora, and OpenTargets genes

x = list(
  "GVC" = gvc.genes$gene_id,
  "Agora" = ago$ensembl_gene_id,
  "OpenTargets" = otensg$ensembl_gene_id
)
library(VennDiagram)
grid.newpage()
v <- venn.diagram(
  x,
  fill = c("#FF0000", "#00FF00", "#0000FF"),
  filename = NULL)
grid.draw(v)

p <- get.venn.partitions(x)
p

Perform ORA of genes in overlaps

GVC ∩ Agora ∩ OpenTargets

genes <- p %>% 
  filter(..set.. == "GVC∩Agora∩OpenTargets") %>%
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)
# save overlap gene ids for later
overlap_gene_ids <- query

GVC ∩ Agora

genes <- p %>% 
  filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(GVC∩Agora)∖(OpenTargets)")) %>%
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

GVC ∩ OpenTargets

genes <- p %>% 
  filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(GVC∩OpenTargets)∖(Agora)")) %>%
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

Agora ∩ OpenTargets

genes <- p %>% 
  filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(Agora∩OpenTargets)∖(GVC)")) %>%
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(GVC ∩ Agora) ∪ (GVC ∩ OpenTargets) ∪ (Agora ∩ OpenTargets)

genes <- p %>% 
  filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(GVC∩Agora)∖(OpenTargets)", "(GVC∩OpenTargets)∖(Agora)", "(Agora∩OpenTargets)∖(GVC)")) %>%
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(Agora ∩ OpenTargets) ∖ (GVC)

genes <- p %>%
  filter(..set.. == "(Agora∩OpenTargets)∖(GVC)") %>% 
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(GVC ∩ OpenTargets) ∖ (Agora)

genes <- p %>%
  filter(..set.. == "(GVC∩OpenTargets)∖(Agora)") %>% 
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(OpenTargets) ∖ (GVC ∪ Agora)

genes <- p %>%
  filter(..set.. == "(OpenTargets)∖(GVC∪Agora)") %>% 
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(GVC ∩ Agora) ∖ (OpenTargets)

genes <- p %>%
  filter(..set.. == "(GVC∩Agora)∖(OpenTargets)") %>% 
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(Agora) ∖ (GVC ∪ OpenTargets)

genes <- p %>%
  filter(..set.. == "(Agora)∖(GVC∪OpenTargets)") %>% 
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(GVC) ∖ (Agora ∪ OpenTargets)

genes <- p %>%
  filter(..set.. == "(GVC)∖(Agora∪OpenTargets)") %>% 
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

Perform ORA of GVC genes sorted by Agora or OpenTargets scores

GVC genes sorted by Agora’s genetics_score

Arrange by Agora’s genetics_score and OpenTargets’ otGeneticsPortal:

d1 <- gvc.genes %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  arrange(desc(genetics_score), desc(otGeneticsPortal)) %>% 
  select(-c(symbol, hgnc_symbol)) %>% 
  select(gene_id, gene_symbol, genetics_score, otGeneticsPortal, everything())

d1
query <- d1 %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

GVC genes sorted by OpenTargets’ otGeneticsPortal

Arrange by OpenTargets’ otGeneticsPortal and Agora’s genetics_score:

d2 <- gvc.genes %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  arrange(desc(otGeneticsPortal), desc(genetics_score)) %>% 
  select(-c(symbol, hgnc_symbol)) %>% 
  select(gene_id, gene_symbol, otGeneticsPortal, genetics_score, everything())

d2
query <- d2 %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

GVC genes sorted by Agora’s target_risk_score

Arrange by Agora’s target_risk_score and OpenTargets’ globalScore:

d3 <- gvc.genes %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  arrange(desc(target_risk_score), desc(globalScore)) %>% 
  select(-c(symbol, hgnc_symbol)) %>% 
  select(gene_id, gene_symbol, target_risk_score, globalScore, everything())

d3
query <- d3 %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

GVC genes sorted by OpenTargets’ globalScore

Arrange by OpenTargets’ globalScore and Agora’s target_risk_score:

d4 <- gvc.genes %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  arrange(desc(globalScore), desc(target_risk_score)) %>% 
  select(-c(symbol, hgnc_symbol)) %>% 
  select(gene_id, gene_symbol, globalScore, target_risk_score, everything())

d4
query <- d4 %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

Correlation of Agora and OpenTargets scores (GVC genes only)

d.cor <- gvc.genes %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id))
d.cor %>% nrow()
[1] 1347
d.cor %>% drop_na(genetics_score, otGeneticsPortal) %>% nrow()
[1] 56
d.cor %>% drop_na(genetics_score, otGeneticsPortal) %>%
  summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) %>%
  unnest(cor)
d.cor %>% nrow()
[1] 1347
d.cor %>% drop_na(target_risk_score, globalScore) %>% nrow()
[1] 75
d.cor %>% drop_na(target_risk_score, globalScore) %>%
  summarize(cor = tidy(cor.test(target_risk_score, globalScore, method="kendall"))) %>%
  unnest(cor)

Correlation of Agora and OpenTargets scores (all genes)

d.cor <- ago %>% 
  left_join(otensg, by = "ensembl_gene_id")
d.cor %>% nrow()
[1] 925
d.cor %>% drop_na(genetics_score, otGeneticsPortal) %>% nrow()
[1] 75
d.cor %>% drop_na(genetics_score, otGeneticsPortal) %>%
  summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) %>%
  unnest(cor)
d.cor %>% nrow()
[1] 925
d.cor %>% drop_na(target_risk_score, globalScore) %>% nrow()
[1] 488
d.cor %>% drop_na(target_risk_score, globalScore) %>%
  summarize(cor = tidy(cor.test(target_risk_score, globalScore, method="kendall"))) %>%
  unnest(cor)

GVC loci annotated with genes in overlaps

GVC ∩ Agora ∩ OpenTargets

gene_ids <- p %>% 
  filter(..set.. == "GVC∩Agora∩OpenTargets") %>%
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
  distinct(gene_id) %>%
  pull(gene_id)

length(gene_ids)
[1] 75
gvc %>%
  filter(gene_id %in% gene_ids) %>%
  select(gvc_locus = grouped_loci_gvc, gene_id, gene_symbol) %>%
  arrange(gene_symbol) %>%
  mutate(gene = gene_symbol) %>%
  # unite(gene, gene_id, gene_symbol, sep = ":", remove = FALSE) %>%
  distinct(gvc_locus, gene, .keep_all = TRUE) %>%
  group_by(gvc_locus) %>%
  summarize(genes = str_c(gene, collapse = " | ")) %>%
  select(gvc_locus, genes) %>%
  gt::gt()
gvc_locus genes
ABCA7 ABCA7 | NDUFS7
ABI3 / ACE NGFR | ZNF652
ACE ACE
ADAM10 / MINDY2 ADAM10 | ALDH1A2 | LIPC
ADAMTS4 ADAMTS4 | FCER1G | NDUFS2
ANK3 / CCDC6 CCDC6 | SLC16A9
ANKRD31 ANKRD31 | ENC1
APH1B LACTB
APOE / TOMM40 APOC1 | APOE | BCAM | MARK4 | NECTIN2
APP MRPL39
APP / ADAMTS1 ADAMTS1
BCKDK / KAT8 / VKORC1 BCKDK | STX4 | VKORC1
BIN1 BIN1
CASS4 CASS4
CD2AP CD2AP
CD33 CD33
CHRNE ENO3 | RABEP1 | SLC25A11 | ZFP3
CLU / PTK2B CLU | EPHX2 | PTK2B | SCARA3
CR1 CR1
CTSH CTSH
DOC2A DOC2A
ECHDC3 / USP6NL USP6NL
EED / PICALM DLG2 | PICALM
EPHA1 / EPHA1-AS1 EPHA1
HAVCR2 CYFIP2 | HAVCR2
HLA HLA-DRA | HLA-DRB1
ICA1 NXPH1
IDUA CPLX1
IL34 MTSS2
INPP5D INPP5D
LILRB2 / TMC4 LAIR1
MADD / SPI1 C1QTNF4 | NDUFS3 | NR1H3 | RAPSN | SPI1
MS4A / MS4A2 / MS4A4A / MS4A6A MRPL16 | MS4A2 | MS4A4A | MS4A6A
NDUFAF7 / PRKD3 QPCT
NYAP1 / PILRA / SPDYE3 / ZCWPW1 NYAP1
OARD1 / TREM2 / TREML2 / UNC5CL TREM2
PLCG2 PLCG2 | SDR42E1
PLEKHA1 HTRA1
RABEP1 / SCIMP ENO3 | RABEP1 | SLC25A11 | ZFP3
RASGEF1C MAPK9
RIN3 / SLC24A4 RIN3 | SLC24A4
SHARPIN PLEC
SIGLEC11 NR1H2
WNT3 NSF

Perform ORA of Agora and OpenTargets genes sorted by global or genetic score

Agora genes sorted by genetics_score

d5 <- ago %>%
  drop_na(genetics_score) %>%
  arrange(desc(genetics_score))

d5
query <- d5 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

OpenTargets genes sorted by otGeneticsPortal

d6 <- otensg %>%
  drop_na(otGeneticsPortal) %>%
  arrange(desc(otGeneticsPortal))

d6
query <- d6 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

Agora genes sorted by target_risk_score

d7 <- ago %>%
  drop_na(target_risk_score) %>%
  arrange(desc(target_risk_score))

d7
query <- d7 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

OpenTargets genes sorted by globalScore

d8 <- otensg %>%
  drop_na(globalScore) %>%
  arrange(desc(globalScore))

d8
query <- d8 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

Check missing OpenTargets scores in Brian’s table

t <- read_xlsx("8-23-2024 - GVC Table 1C - WORKING COPYL_MRC.xlsx", skip = 1, na = "No data") |> janitor::clean_names() |> select(gvc_expanded_list_of_possible_genes_500kb, open_target_scores_global, open_target_scores_genetics) |> rename(symbol = gvc_expanded_list_of_possible_genes_500kb)
t |>
  left_join(ot, by = "symbol") |>
  filter(round(open_target_scores_global, 4) != round(globalScore, 4)) |> 
  select(symbol, open_target_scores_global, globalScore)
t |>
  left_join(ot, by = "symbol") |>
  filter(round(open_target_scores_genetics, 4) != round(otGeneticsPortal, 4)) |> 
  select(symbol, open_target_scores_genetics, otGeneticsPortal)